Model and Simulations

This is an analysis of the PTSD model “complete” simulations (“Simulation 3”).

Trajectories to simulate

One of the goals of the model is to simulate the recovery trajectories in PTSD. The classic distinction is between four different trajectories, Resilient, Recovery, Chronic, and Delayed. These trajectories can be stylized as such

time <- -20:60

resilient <- rep(0, length(time))
chronic <- tanh(time/2)
delayed <- time**2 / max(time**2)
#recovery <- chronic - delayed
recovery <- chronic + ((60 - time)**2 / max(time**2)-1)
#recovery <- recovery/max(recovery)

resilient[time <= 0] <- 0
chronic[time <= 0] <- 0
delayed[time <= 0] <- 0
recovery[time <= 0] <- 0

wideal <- tibble(Day = time, 
                Chronic = chronic,
                Delayed = delayed,
                Recovery = recovery,
                Resilient = resilient)
        
ideal <- wideal %>%
  pivot_longer(cols=c("Chronic", "Delayed", "Resilient", "Recovery"),
               values_to = "CTraumatic",
               names_to = "Trajectory")

ideal$Trajectory <- factor(ideal$Trajectory,
                           levels = c("Recovery", "Delayed", "Resilient",  "Chronic"))

ggplot(ideal, 
       aes(x=Day, 
           y=CTraumatic, 
           col=Trajectory, 
           fill=Trajectory)) +
  #geom_smooth(method="loess", span=0.2) +
  geom_line(size=2, alpha=0.75) +
  ggtitle("Theoretical Recovery Trajectories") +
  ylab("Number of Memory Intrusions") +
  geom_vline(data=tibble(), 
             aes(xintercept=-0.35)) +
  scale_color_d3() +
  theme_pander()  +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank())

Underlying memory model

The underlying memory model is John Anderson’s ACT-R.

# Simulate ACT-R

decay <- function(time, t0=0, rate=.5) {
    if (time <= t0) {
      NA
    } else {
      (time - t0)**(-rate)
    }
}

vdecay <- Vectorize(decay)

time <- seq(1, 100, 1)

t1 = vdecay(time, t0=1)
t2 = vdecay(time, t0=20)
t3 = vdecay(time, t0=55)
t4 = vdecay(time, t0=65)


df <-data.frame(Time=time, 
           trace1 = vdecay(time, t0=1),
           trace2 = vdecay(time, t0=20),
           trace3 = vdecay(time, t0=55),
           trace4 = vdecay(time, t0=65))


ldf <- df %>%
  # mutate(Total = if_else(is.na(trace1), 0, trace1) +
  #          if_else(is.na(trace2), 0, trace2) +
  #          if_else(is.na(trace3), 0, trace3) +
  #          if_else(is.na(trace4), 0, trace4))  %>%
  pivot_longer(cols = c("trace1", "trace2", "trace3", "trace4"),
               names_to = "Trace", values_to="Activation")

ldf$Trace <- fct_recode(ldf$Trace, 
                        "Trace 1" = "trace1",
                        "Trace 2" = "trace2",
                        "Trace 3" = "trace3",
                        "Trace 4" = "trace4")

ldf <- ldf %>%
  add_column(Source="Individual Traces")

t1[is.na(t1)] <-0
t2[is.na(t2)] <-0
t3[is.na(t3)] <-0
t4[is.na(t4)] <-0
total <- log(t1+t2+t3+t4)

totaldf <- tibble(Time=time, 
                  Activation = total,
                  Trace = "Memory",
                  Source="Memory")

ldf <- rbind(ldf, totaldf)

ldf$Source <- factor(ldf$Source, levels = c("Memory", "Individual Traces"))

ggplot(ldf, 
       aes(x=Time, y=Activation, col=Trace)) +
  geom_line(size=1,
            alpha=1) +
            #linetype="dashed") +
  # stat_summary(geom="line", fun = sum, 
  #              col="black", 
  #              alpha=0.5,
  #              position = position_dodge()) +
  # #scale_y_log10() +
  facet_wrap(~ Source, ncol=1,
             scales = "free_y") +
  #ylim(0, 1.6) +
  #ylab(expression(paste("Availability of Memory ", italic(m))))+
  ylab("Activation") +
  scale_color_viridis("", option="plasma", discrete=T, end = .8, direction=-1) +
  theme_pander()
## Warning: Removed 141 row(s) containing missing values (geom_path).

Neurological interpretation

brain_model <- tibble(
  region = c("parahippocampal", "entorhinal", "medial orbitofrontal",
             "superior frontal", "paracentral", "precuneus"),
  component = c("LTM", "Emotional Intensity", "Retrieval Control",
                "Context", "Context", "Context")
)

#brain_model$component <- as_factor(brain_model$component)

# brain_model %>%
#  group_by(component) %>%
  # ggplot(aes(fill = component)) +
  # geom_brain(atlas = dk, 
  #            col="white",
  #            show.legend = T
  #            ) +
  # labs(fill="Model\nComponent") +
  # #scale_fill_lancet(na.value="lightgrey") +
  # scale_fill_manual(na.value="lightgrey", 
  #                    values=c("dodgerblue2", "goldenrod1", "firebrick3", 
  #                             "lightskyblue3", "lightskyblue3", "lightskyblue3"),
  #                    breaks = brain_model$component) +
  # coord_sf(xlim = c(750, 1050)) +
  # theme_brain2(text.family = "sans",
  #              text.colour="black") 

Event Distribution

The model lives in a sumulated world in which memories are automatically retrived in response to changes in the environment. These changes, called events occur probabilitistically, with Gamma distrib a predermined frequen

The distribution of Gamma events is this:

  • Frequency: 2.1, 175 (shape, scale)
  • Rumination, 6, 100
t <- 0:(24*60)
E <- dgamma(t, shape=2.1, rate=1/175)
R <- dgamma(t, shape=6, rate=1/100)

dists <- data.frame(Time=t, Events = E, Rumination = R) %>%
  pivot_longer(names_to = "Type", 
               values_to = "Probability",
               cols=c("Events", "Rumination"))

ggplot(dists, aes(x=Time, y=Probability, col=Type, fill=Type)) +
  geom_line() +
  geom_ribbon(data=dists, aes(x=Time, ymax=Probability, ymin=0), alpha=0.25) +
  scale_x_continuous(breaks=60*c(0, 4, 8, 12, 16, 20),
                     limits=c(0, 24*60),
                     labels=c("8:00am", 
                              "12:00pm",
                              "4:00pm",
                              "8:00pm",
                              "12:00am",
                              "4:00am")) +
  scale_color_aaas() +
  scale_fill_aaas() +
  ggtitle("Probability of Retrievals During the Day") +
  theme_pander()

Load and Transform the Data

First, we are going to load the “aggregated” version of the simulations, with the events already aggregated by day.

CREATE_AGGREGAGATED = F

if (CREATE_AGGREGAGATED) {
  d <- read_csv("../simulations/simulations3.csv")
  #d <- read_csv("simulations3.csv", col_types = cols())
  d <- d %>%
    mutate(Day = floor(Time / (60*60*24)) - 100,
           Hour = floor((d$Time / 3600) %% 24),
           RuminationFrequency=as_factor(RuminationFrequency))
  names(d)[18] <- "W"
  
  d <- d %>% 
    dplyr::select(Run, Day, PTEV, Gamma, PTES, W, 
                  NumAttributes, RuminationFrequency, 
                  MemoryEntropy, ChunkSimilarity, Traumatic, ChunkV) %>%
    filter(Day > -20)
  
  a <- d  %>%
    group_by(Run, Day, PTEV, PTES, Gamma, 
             NumAttributes, RuminationFrequency, W) %>%
    summarise(MemoryEntropy = mean(MemoryEntropy),
              ChunkSimilarity = mean(ChunkSimilarity),
              PTraumatic = mean(Traumatic),
              CTraumatic = sum(Traumatic),
              ChunkV = mean(ChunkV))
  
  write_csv(x=a, file = "../simulations/simulations3_aggregated2.csv")
}

a <- read_csv(gzfile("../simulations/simulations3_aggregated2.csv.gz"),
              col_types = cols())

Then, we are going to rename the simulation variables to make them consistent with the names used in published papers:

a <- a %>%
  rename(I = PTEV) %>%
  rename(gamma = Gamma) %>%
  rename(A = NumAttributes) %>%
  rename(C = PTES) %>%
  rename(R = RuminationFrequency)

Recovery Trajectories

To identify a recovery trajectory, we need to first define a classification function. Here, we are going to use the following algorithm:

First the model calculates three average values:

  1. The mean probability P(baseline) of retrieving intrusive memories in the 10 days preceding the PTE, or baseline period. By definition, this is always zero.

  2. The mean probability P(acute) of retrieving intrusive memories in the 10 days following the PTE, or during the acute period.

  3. The mean probability P(chronic) of retrieving intrusive memories in the last ten days of the second month after the PTE (i.e, days 51-60), or the chronic period.

Then, the model calculates two statistical tests:

  1. A t-test between the baseline and acute period, or acute test; and
  2. A t-test between the acute-period and the chronic period, or chronic test.

And finally we apply the following decision tree:

  1. If the acute test is significant at p < .05 (Pacute > Pbaseline), then

    1.1. If the chronic test is also significant at p < .05, and Pchronic > Pacute, we classify the trajectory as Delayed.

    1.2. If the chronic test is significant at p < .05 but P(chronic) < P(acute), then we classify the trajectory as Recovery;

    1.3. If the chronic test is non-significant, then P(chronic) ~ P(acute) , and we classify the trajectory as Chronic.

  2. If, instead, the acute test is not significant and P(acute) ~ P(baseline), then:

    2.1. If the chronic test is significant at p < .05, then P(chronic) > P(acute), and we classify the trajectory as Delayed.

    2.2 If the chronic test is also not significant, then P(chronic) ~ P(baseline) ~ P(acute), and we classify the trajectory as Resilient.

# Classifies people based on trajectory:
# Chronic = 1
# Recovery =2
# Delay    =3
# resilient= 4
#
classify <- function(days) {
  g1 <- days[10:19]
  g2 <- days[21:30]
  g3 <- days[70:79]
  t12 <- t.test(g1, g2)
  t23 <- t.test(g2, g3)
  category <- 0
  if (!is.na(t12$p.value) & t12$p.value < 0.05) {
    # t1 < t2
    if (!is.na(t23$p.value) & t23$p.value < 0.05) {
      # t2 <> t3

      if (!is.na(t23$statistic) & t23$statistic > 0) {
        # t1 < t2, t2 > 3
        category <- "Recovery" #2  # Recovery
      } else {
        # t1 < t2, t2 < t3
        category <- "Delayed" # 3 # Delayed
      }
    } else {
      # t1 < t2, t2 = t3
      category <- "Chronic" # 1 # Chronic
    }
  } else {
    # t1 == t2
    if (!is.na(t23$p.value) & t23$p.value < 0.05) {
      # t1 == t2, t2 < t3
      category <- "Delayed" #3 # Delayed
    } else {
      # t1 == t2, t2 = t3
      category <- "Resilient" # 4 # Resilient
    }
  }
  category
}

patterns <- a %>%
  group_by(Run, I, C, A, R, W, gamma) %>%
  dplyr::summarize(Trajectory = classify(PTraumatic))
## `summarise()` has grouped output by 'Run', 'I', 'C', 'A', 'R', 'W'. You can override using the `.groups` argument.
patterns <- patterns %>% ungroup

Finally, let’s assign each data point in the main averages with the corresponding trajectory, and re-order the trajectories in order of severity:

a <- inner_join(a, patterns)
## Joining, by = c("Run", "I", "C", "gamma", "A", "R", "W")
a$Trajectory <- factor(a$Trajectory,
                      levels = c("Recovery", "Delayed", "Resilient",  "Chronic"))

Chronic Probability of Traumatic Intrusions

The other “behavioral” measure is the incidence of traumatic memory intrusions. As an aggregate measure, we will consider only the averages in the “Chronic” period, which we will be taking as indicative of long-term consequences of the PTSD.

traumatic <- a %>%
  filter(Day > 1) %>%
  group_by(Run, I, C, R, W, gamma) %>%
  dplyr::summarize(PTraumatic = mean(PTraumatic),
            CTraumatic = sum(CTraumatic))
## `summarise()` has grouped output by 'Run', 'I', 'C', 'R', 'W'. You can override using the `.groups` argument.

At this point, we can visualize the mean aggregated timecourse of intrusions for each trajectory.

K = rev(brewer.pal(5, "Greys"))

TOTAL <- 28800  # Param combos

traj_total <- a %>%
  filter(Day == 30) %>%
  group_by(Trajectory) %>%
  summarise(N = length(CTraumatic),
            Prop = length(CTraumatic)/TOTAL,
            MeanC = mean(CTraumatic))

ggplot(filter(a, I != 1), 
       aes(x=Day, y=CTraumatic, col=Trajectory, fill=Trajectory)) +
  ggtitle("Memory Intrusions by Trajectory") +
  #geom_vline(data=props, aes(xintercept=-0.5)) +
  annotate("segment", x=-0, xend=-0,
           y=-Inf, yend=Inf, col="black", lty=1, size=1) +
  annotate("rect", xmin=50, xmax=60,
           ymin=-Inf, ymax=Inf, fill=K[1], alpha=0.3)+
  annotate("rect", xmin=1, xmax=10,
           ymin=-Inf, ymax=Inf, fill=K[3], alpha=0.3)+
  annotate("rect", xmin=-10, xmax=-1,
           ymin=-Inf, ymax=Inf, fill=K[4], alpha=0.3)+
  
  annotate("text", x= -5.5, y=15, label="Base")+
  annotate("text", x= 5.5, y=15, label="Acute")+
  annotate("text", x= 55, y=15, label="Chronic")+
  
  geom_text(data=traj_total, aes(x= 30, y = MeanC, 
                                 label=percent(Prop, accuracy = 0.1)),
            vjust=-.5, show.legend=F) +
  
  stat_summary(fun.data = mean_se, geom="line") +
  stat_summary(fun.data = mean_cl_normal, 
               geom="ribbon", alpha = 0.25, col=NA) +
  coord_cartesian(ylim=c(0, 15)) +
  ylab("Total Number of Memory Intrusions") +
  scale_color_d3() +
  scale_fill_d3() +
  theme_pander()

The results indicate that our classification was correct; the trajectories behave as planned.

Finding a baseline to compare the model to existing literature

Since we have no idea how typical the model parameters are of the existing patients and their cases, we need to provide some sort of baselines. To do so, we will use the proportion of trajectories that were given by Galatzer-Levy and Bonanno in their meta-analysis. They are:

  • Resilient, 0.657
  • Recovery, 0.208
  • Chronic, 0.106
  • Delayed, 0.089

Note that they do not normalize to one, due to the lack of some trajectories in some studies. To use them, we will need to make sure to normalized them first, which can be done at time of testing.

T_observed <- c(0.657, 0.208, 0.106, 0.089)
names(T_observed) <- c("Resilient", "Recovery", "Chronic", "Delayed")

We can now plot the percentages and the patterns by trajectory:

props <- patterns %>%
  group_by(Trajectory, 
           I, 
           gamma, 
           C, 
           W,
           R,
           #A,
           ) %>%
  dplyr::summarize(N = length(Trajectory)) %>%
  dplyr::mutate(Prop = N/50)
## `summarise()` has grouped output by 'Trajectory', 'I', 'gamma', 'C', 'W'. You can override using the `.groups` argument.
props %>%
  pivot_wider(id_cols=c(I, 
                        gamma, 
                        C,
                        W,
                        R,
                        #A,
                        ), 
              names_from = Trajectory,
              values_from = N,
              values_fill=0) -> test

mychi <- function(vals) {
  chisq.test(vals, 
             p=T_observed, 
             rescale.p=T)$statistic
}

mycorr <- function(vals) {
  cor(vals, T_observed)
}

myrmse <- function(vals) {
  v <- 50 * (T_observed/sum(T_observed))
  sqrt(mean((vals - v)^2))
}


lprops <- test %>%
  pivot_longer(cols=c("Resilient", "Recovery", "Chronic", "Delayed"),
               names_to = "Trajectory",
               values_to = "N")

lprops$Trajectory <- factor(lprops$Trajectory,
                      levels = c("Recovery", "Delayed", "Resilient",  "Chronic"))

# Suppresses X-squared approximation warnings
old.warn <- options()$warn
options(warn = -1)

atest <- lprops %>%
  group_by(I, 
           gamma, 
           C, 
           W,
           R,
           #A,
           ) %>%
  summarise(Chi = mychi(N), r=mycorr(N), RMSE=myrmse(N))
## `summarise()` has grouped output by 'I', 'gamma', 'C', 'W'. You can override using the `.groups` argument.
# resets warnings
options(warn = old.warn)

test <- inner_join(test, atest)
## Joining, by = c("I", "gamma", "C", "W", "R")

Now, we can identfy the baseline as the condition with the minimum \(\chi^2\)-squared test (or the minimum RMSE, they are the same)

This shows how correlation coefficient and \(\chi^2\) are related, while the RMSE does not really capture either of them:

ggplot(test, aes(x=Chi, y=r, col=RMSE)) + 
  geom_point(size=4, alpha=0.5) + 
  scale_color_viridis(option="plasma")+
  ylab(expression(italic(r))) +
  xlab(expression(chi^2)) +
  ggtitle("Relationship between Error Measures in Trajectories") +
  theme_pander()

Now, let’s identify a “baseline” condition—the one that most resembles the distribution of trajectories identified by Galatzer-Levy.

baseline <- test %>%
  filter(Chi == min(test$Chi))

baseline %>%
  kable(digits=3) %>%
  kable_styling(bootstrap_options = c("hover", "striped"))
I gamma C W R Chronic Delayed Recovery Resilient Chi r RMSE
40 0.9 0.25 8 0 15 3 15 67 7.464 0.985 18.875

And here is a visual representation of the four trajectories at that baseline (smoothed, because there is too much day-to-day variance in this small sample).

baseline_a <- a %>%
  filter(I == baseline$I,
         W == baseline$W,
         C == baseline$C,
         R == baseline$R,
         gamma == baseline$gamma,
         )

base_total <- baseline_a %>%
  filter(Day == 15) %>%
  group_by(Trajectory) %>%
  summarise(N = length(CTraumatic),
            Prop = length(CTraumatic)/100,
            MeanC = mean(CTraumatic))

         
ggplot(baseline_a, 
       aes(x=Day, y=CTraumatic, col=Trajectory, fill=Trajectory)) +
  geom_smooth(method="loess", span=0.2) +
  ggtitle("Trajectories for Best-Fitting Parameters") +
  scale_color_d3() +
  geom_text(data=base_total, aes(x= c(25, 10, 20, 10), y = MeanC, 
                                 label=percent(Prop, accuracy = 0.1)),
            vjust=-.5, show.legend=F) +
  ylab("Total Number of Memory Intrusions") +
  geom_vline(data=props, aes(xintercept=-0.25)) +
  scale_fill_d3() +
  theme_pander() 
## `geom_smooth()` using formula 'y ~ x'

The Chonic and Delayed trajectories, being the least common, exhibit significant ups and downs, hence the need for Loess smoothing. Nonetheless, the trajectories remain visible and clearly identifiable.

But how good is the baseline condition, in terms of being closed to Galatzer-Levy’s estimated pooled values? We can just examine the significance of the \(\chi^2\) test.

baseline_vals <- baseline[c("Resilient", "Recovery", "Chronic", "Delayed")]
chi <- chisq.test(baseline_vals, p=T_observed, rescale.p = T)

The result is not too bad—A bit dissimilar, but not too far either, and not statistically significant (\(\chi^2\) = 7.463523, p = 0.058503)).

The main difference is that the delayed onset trajectory has a higher proportion than in the review. However, Galatzer-Levy and Bonanno themselves note that the resilient (M= 0.66) delayed onset trajectories (M = 0.099) has a higher prevalence in studies of single-point event. If these corrected prevalences are taken into account, the chi squared reduces further.

Predictions

Forward prediction

grand <- a %>%
  group_by(W, I, C, gamma, R, Trajectory) %>%
  summarize(Prob = length(Trajectory) / 100)
## `summarise()` has grouped output by 'W', 'I', 'C', 'gamma', 'R'. You can override using the `.groups` argument.
grand_f <- grand %>%
  group_by(W, I, C, gamma, R) %>%
  filter(Prob == max(Prob)) %>%
  rename(PredictedTrajectory = Trajectory)

forward_prediction <- full_join(patterns, grand_f) %>%
  mutate(Accuracy = if_else(Trajectory == PredictedTrajectory, 1, 0))
## Joining, by = c("I", "C", "R", "W", "gamma")

Now, visualization

forward_prediction %>%
  filter(I > 1) %>%
  group_by(Trajectory) %>%
  summarise(Accuracy = mean(Accuracy)) 
## # A tibble: 4 × 2
##   Trajectory Accuracy
##   <chr>         <dbl>
## 1 Chronic       0.602
## 2 Delayed       0.212
## 3 Recovery      0.662
## 4 Resilient     0.911
forward_prediction %>%
  filter(I > 1) %>%
  summarise(Accuracy = mean(Accuracy)) 
## # A tibble: 1 × 1
##   Accuracy
##      <dbl>
## 1    0.701

Visualization

forward_prediction$Trajectory <- factor(forward_prediction$Trajectory,
                                        levels = c("Resilient", "Recovery", "Chronic", "Delayed"))

forward_prediction$PredictedTrajectory <- factor(forward_prediction$PredictedTrajectory,
                                                 levels = c("Resilient", "Recovery", "Chronic", "Delayed"))

forward_percentages <- forward_prediction %>%
  filter(I > 1) %>%
  #group_by(I, C, R, W, gamma) %>%
  #mutate(Count = length(Trajectory)) %>%
  ungroup() %>%
  group_by(Trajectory, PredictedTrajectory) %>%
  summarize(Percent = length(Prob)/nrow(filter(forward_prediction, I> 1))) 
## `summarise()` has grouped output by 'Trajectory'. You can override using the `.groups` argument.
  #ungroup() %>%
  #group_by(Trajectory) %>%
  #summarise(Percent = length(Count) / mean(Count))

ggplot(forward_percentages, aes(x=PredictedTrajectory, y=Trajectory, fill=Percent)) +
  geom_tile(col="white") +
  scale_fill_viridis(option="inferno", end=0.8) +
  ggtitle("Confusion Matrix for Predicted Trajectories") +
  geom_text(aes(label = percent(Percent, .1)), col="white") +
  theme_pander()

Backward Predictions

intermediate <- a %>%
  filter(Day > 0) %>%
  filter(I != 1) %>%
  mutate(Condition = paste("(", I, 
                           ", ",  C, 
                           ", ", W, ")", sep="")) %>%
  group_by(Day, I, C, gamma, W, R, Condition)  %>%
  summarise(Intrusions=mean(CTraumatic),
            SDIntrusions=sd(CTraumatic),) 
## `summarise()` has grouped output by 'Day', 'I', 'C', 'gamma', 'W', 'R'. You can override using the `.groups` argument.
ggplot(intermediate, 
       aes(x=Day, y=Intrusions, col=as.factor(gamma))) +
  geom_point() +
  ylim(0, 50) +
  facet_wrap(~ Condition) + #label=label_both) +
  geom_vline(data=props, aes(xintercept=-0.15)) +
  theme_pander()

Now, let’s build the probability table

get_prob_by_day_5 <- function(n, day, Intensity, Context, WM, gammaval, Rval) {
    intermediate %>%
    filter(Day == day,
           W == WM,
           I == Intensity,
           C == Context,
           R == Rval, 
           gamma == gammaval) -> sub
    m <- sub$Intrusions
    sd <- sub$SDIntrusions
    dnorm(n, m, sd)
}

CREATE_LOGLIKELIHOODS <- F

if (CREATE_LOGLIKELIHOODS) {
  vals <- NULL
  for (n in 0:max(a$CTraumatic)) {
    print(n)
    for (d in unique(intermediate$Day)) {
      for (i in unique(intermediate$I)) {
        for (c in unique(intermediate$C)) {
          for (w in unique(intermediate$W)) {
            for (g in unique(intermediate$gamma)) {
              for (r in unique(intermediate$R)) {
                p <- get_prob_by_day_5(n, d, i, c, w, g, r)
                result <- c(n, d, i, c, w, g, r, p)
                if (is.null(vals)) {
                  vals <- result
                } else {
                  vals <- rbind(vals, result)
                }
              }
            }
          }
        }
      }
    }
  }
  

  vals <- as_tibble(vals)
  names(vals) <- c("N", "Day", "I", "C", "W", "gamma", "R", "Probability")
  
  #Replace probabilities=0 with very small values 
  vals$Probability[vals$Probability < 1e-200] <- 1e-200
  vals$Probability[vals$Probability > 1] <- 1
  vals$LogLikelihood <- log(vals$Probability)
  write_tsv(vals, "loglikelihoods5_60.tsv")
} else {
  vals <- read_tsv("loglikelihoods5_60.tsv")
}
## Rows: 828360 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## dbl (9): N, Day, I, C, W, gamma, R, Probability, LogLikelihood
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
k <- NULL
parameters_mle <- function(predicted) {
  filtered <- NULL
  day <- 1
  #print(predicted)
  for (n in as_vector(predicted)) {
    sub <- vals %>%
      filter(Day == day,  N == n)
    if (is.null(filtered)) {
      filtered <- sub
    } else {
      filtered <- rbind(filtered, sub)
    }
#    print(c(day, n, dim(filtered)))
    
    day <- day + 1
  }
  filtered
  likelihoods <- filtered %>%
    group_by(I, C, W, gamma, R) %>%
    summarise(LogLikelihood = sum(LogLikelihood))
  filter(likelihoods, LogLikelihood == max(likelihoods$LogLikelihood))
}

Example run:

test <- a %>% 
  filter(Day > 0, Day <= 20) %>%
  filter(W == 4,  
         C == 0.25, 
         gamma == 0.9, 
         I == 60, 
         R == 20, 
         A == 4, 
         Run == 2) %>% 
  dplyr::select(CTraumatic)
parameters_mle(test)
## `summarise()` has grouped output by 'I', 'C', 'W', 'gamma'. You can override using the `.groups` argument.
## # A tibble: 1 × 6
## # Groups:   I, C, W, gamma [1]
##       I     C     W gamma     R LogLikelihood
##   <dbl> <dbl> <dbl> <dbl> <dbl>         <dbl>
## 1    60   0.5     8  0.95    20         -65.9

Now let’s analyze all the runs of the model, and create the predicted parameter values for each

mle_prediction <- NULL

CREATE_MLE <- F

if (CREATE_MLE) {
  options(dplyr.summarise.inform = FALSE)
  for (maxday in c(1,10,20)) { #max(a$Day)) {
    a_sub <- a %>%
      filter(Day > 0, Day <= maxday, I > 1)
    
    
    jj <- 1
    
    for (i in unique(a_sub$I)) {
      for (c in unique(a_sub$C)) {
        for (w in unique(a_sub$W)) {
          for (g in unique(a_sub$gamma)) {
            for (r in unique(a_sub$R)) {
              for (att in unique(a_sub$A)) {
                print(c(maxday, i, c, w, g, r, att))
                for (j in unique(a_sub$Run)) {
                  
                  values <- a_sub %>%
                    filter(W == w,  
                           C == c, 
                           gamma == g, 
                           I == i, 
                           R == r, 
                           A == att, 
                           Run == j) %>% 
                    dplyr::select(CTraumatic)
                  prediction <- as_vector(parameters_mle(values)[1,])
                  
                  observed <- tibble(Parameters = c("I", "C", "W", "gamma", "R"),
                                     Type = "Observed",
                                     MaxDay = maxday,
                                     Value = c(i, c, w, g, r),
                                     LogLikelihood = prediction[6],
                                     Run = jj)
                  
                  predicted  <- tibble(Parameters = c("I", "C", "W", "gamma", "R"),
                                       Type = "Prediction",
                                       MaxDay = maxday,
                                       Value = prediction[1:5],
                                       LogLikelihood = prediction[6],
                                       Run = jj)
                  
                  set <- rbind(observed, predicted)
                  #print("Done")
                  
                  if (is.null(mle_prediction)) {
                    mle_prediction <- set
                  } else {
                    mle_prediction <- rbind(mle_prediction, set)
                  }
                  jj <- jj+1
                }
              }
            }
          }
        }
      }
    }
  }
  options(dplyr.summarise.inform = T)
  write_csv(mle_prediction, "mle_predictions5_alldays_01_20.csv")
} else {
  mle_predictions <- read_csv("mle_predictions5_alldays.csv")
}
## Rows: 1512000 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Parameters, Type
## dbl (4): MaxDay, Value, LogLikelihood, Run
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
mle_predictions <- mle_predictions %>%
  group_by(Parameters, MaxDay) %>%
  mutate(ScaledValue = Value / max(Value))

wmle_predictions <- mle_predictions %>%
  dplyr::select(-ScaledValue) %>%
  pivot_wider(values_from = "Value", names_from = "Type")

ggplot(wmle_predictions, aes(x=Prediction, y=Observed)) +
  geom_count(alpha=.75, aes(size = ..prop.., col=..prop..)) +
  geom_smooth(method="lm", col="red") +
  facet_wrap(MaxDay~Parameters, scales="free", ncol=5) +
  theme_pander()
## `geom_smooth()` using formula 'y ~ x'

newbackpred <- wmle_predictions %>%
  mutate(SObserved = as.character(Observed),
         SPredicted = as.character(Prediction)) %>%
  ungroup() %>%
  group_by(Parameters, MaxDay) %>%
  mutate(Count = length(Run)) %>%
  ungroup() %>%
  group_by(Parameters, SPredicted, SObserved, MaxDay) %>%
  summarise(Percent = length(Count) / mean(Count))
## `summarise()` has grouped output by 'Parameters', 'SPredicted', 'SObserved'. You can override using the `.groups` argument.
newbackpred$SPredicted[newbackpred$SPredicted == "4"] <- "04"
newbackpred$SPredicted[newbackpred$SPredicted == "8"] <- "08"

newbackpred$SObserved[newbackpred$SObserved == "4"] <- "04"
newbackpred$SObserved[newbackpred$SObserved == "8"] <- "08"


ggplot(newbackpred, aes(x=SPredicted, y=SObserved, fill=Percent)) +
  geom_tile(col = "white") +
  scale_fill_viridis(option="magma", end=0.8) +
  #geom_smooth(method="lm", col="red") +
  facet_wrap(MaxDay~Parameters, scales="free", ncol=5) +
  geom_text(aes(label = percent(Percent, .1)), col="white") +

  theme_pander()

wmle_predictions %>%
  ungroup() %>%
  mutate(Accuracy = if_else(Prediction == Observed, 1, 0)) %>%
  group_by(Parameters, MaxDay) %>%
  summarise(Accuracy = mean(Accuracy),
            RMSE = sqrt(mean((Prediction - Observed)**2)),
            Correlation = cor.test(Prediction, Observed)$estimate) -> accuracies
## `summarise()` has grouped output by 'Parameters'. You can override using the `.groups` argument.
accuracies
## # A tibble: 35 × 5
## # Groups:   Parameters [5]
##    Parameters MaxDay Accuracy   RMSE Correlation
##    <chr>       <dbl>    <dbl>  <dbl>       <dbl>
##  1 C               1    0.333 0.385       0.269 
##  2 C              10    0.350 0.354       0.319 
##  3 C              20    0.374 0.337       0.363 
##  4 C              30    0.384 0.328       0.389 
##  5 C              40    0.402 0.320       0.414 
##  6 C              50    0.412 0.316       0.427 
##  7 C              59    0.414 0.313       0.437 
##  8 gamma           1    0.351 0.0973      0.0571
##  9 gamma          10    0.500 0.0761      0.357 
## 10 gamma          20    0.530 0.0725      0.412 
## # … with 25 more rows
accuracies$Chance = rep(c(1/4, 1/3, 1/3, 1/2, 1/3), each = 7)

ggplot(accuracies, aes(x=Parameters, y=Accuracy, fill=as_factor(MaxDay))) +
  scale_fill_aaas() +
  facet_wrap(MaxDay ~ Parameters, ncol=5, drop=TRUE, scales = "free_x") +
  geom_col(col="lightgrey") +
  geom_hline(aes(yintercept = Chance), linetype="dashed") +
  ylim(0, 0.75) +
  theme_pander()

#ggsave("alia.png")
wmle_predictions %>% 
  group_by(Parameters) %>% 
  summarise(Predicted_SD = sd(Prediction), 
            Observed_SD = sd(Observed), 
            Predicted_Mean = mean(Prediction),
            Observed_Mean = mean(Observed), 
            )
## # A tibble: 5 × 5
##   Parameters Predicted_SD Observed_SD Predicted_Mean Observed_Mean
##   <chr>             <dbl>       <dbl>          <dbl>         <dbl>
## 1 C                0.319       0.280           0.390         0.375
## 2 gamma            0.0660      0.0624          0.861         0.883
## 3 I               17.7        16.3            39.9          40    
## 4 R                9.52       10.0             6.95         10    
## 5 W                3.34        3.27            7.17          8